* =======-===================================================================== *
* PROJECT:		MAKING COUNTRIES SMALL: THE NATIONALIZATION OF DISTRICTS
* JOURNAL: 		Political Science Research Methods
* OBJECTIVE		Replicating resuls. constituency_level analysys
* AUTHOR: 		Ignacio Lago
* DATE:			2022-12-27
* ============================================================================ *


clear all
global beg_path `"C:\XXX\XXX\XXX\Lago_Replication\"' // Set the path of the working directory here 
set more off

*Load dataset
use "constituency_level.dta"

*****Data set-up
**creating a dummy for the south
gen south=dummies
recode south 20=1 25=1 27=1 5=1 23=1 18=1 10=1 3=1 14=1 29=1 9=1  *=0
**creating time zones
gen timezones=dummies
recode timezones 13=5 4=5 27=5 5=5 17=5 15=5 21=5 6=5 24=5 26=5  1=5 7=5 8=5 10=5 16=5 2=5 11=5 3=5 14=5 12=5 9=5 50=5 20=6 25=6 19=6 28=6 34=6 23=6 32=6 18=6 36=6 42=6 43=6 45=6 29=6 30=6 22=6 47=7 37=7 38=7 46=7 44=7 40=7 31=8 41=8 35=8 33=8 39=8 51=5 *=.
label variable timezones "timezones"
label define timezones 5 "GMT-5" 6 "GMT-6" 7 "GMT-7" 8 "GMT-8"
label values timezones timezones


*Table 1
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if dummies!=48&dummies!=49, cluster(dummies)
predict j if e(sample)==1
estpost summarize ENP_cst logdist vv1million popmillion if j<10000
esttab using "table1all.rtf", replace cells("count(fmt(a2)) mean sd min max") label
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if dummies!=48&dummies!=49&south==0, cluster(dummies)
predict n if e(sample)==1
estpost summarize ENP_cst logdist vv1million popmillion if n<10000
esttab using "table1non-south.rtf", replace cells("count(fmt(a2)) mean sd min max") label

*Figure 1
lowess ENP_nat yr, xlabel(1850(50)2000) ytitle("ENP_nat") title("") name(a3)
lowess ENP_cst yr if ENP_cst<3, xlabel(1850(50)2000) ytitle("ENP_cst") title("Entire US") name(a1)
lowess ENP_cst yr if ENP_cst<3 & south==0, xlabel(1850(50)2000) ytitle("ENP_cst") title("Non-South") name(a2)
graph combine a3 a1 a2, holes(2) title("") saving("Figure1_Nationalization_United_States")

*Figure 2
lowess ENP_cst yr if ENP_cst<3, ytitle("ENP_cst") title("") by(timezones) saving("Figure2_Nationalization_and_Distance")

*Figure 3 
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if yr<1850&dummies!=48&dummies!=49, cluster(dummies)
estimates store c
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr<1875&dummies!=48&dummies!=49, cluster(dummies)
estimates store d
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr<1900&dummies!=48&dummies!=49, cluster(dummies)
estimates store e
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if yr<1925&dummies!=48&dummies!=49, cluster(dummies)
estimates store f
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr<1950&dummies!=48&dummies!=49, cluster(dummies)
estimates store h
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if yr<1975&dummies!=48&dummies!=49, cluster(dummies)
estimates store i
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if yr<2000&dummies!=48&dummies!=49, cluster(dummies)
estimates store j
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if yr<2020&dummies!=48&dummies!=49, cluster(dummies)
estimates store k
coefplot c d e f h i j k  , keep ( logdist) yline(0) xlabel(0.61(0.11)1.38, grid) title("Entire US") ytitle("Coefficient on (the log of) distance") xlabel(0.61 "1850" 0.72 "1875" 0.83 "1900" 0.94 "1925" 1.06 "1950" 1.17 "1975" 1.28 "2000" 1.39 "2016")  legend(off) ciopts(lcolor(ebblue)) mcolor(ebblue) vert name(c1)
reg ENP_cst c.logdis   popmillion vv1million  i.dummies if south==0&yr<1850&dummies!=48&dummies!=49, cluster(dummies)
estimates store c
reg ENP_cst c.logdis    popmillion vv1million   i.dummies if south==0&yr<1875&dummies!=48&dummies!=49, cluster(dummies)
estimates store d
reg ENP_cst c.logdis   popmillion vv1million   i.dummies if south==0&yr<1900&dummies!=48&dummies!=49, cluster(dummies)
estimates store e
reg ENP_cst c.logdis    popmillion vv1million   i.dummies if south==0&yr<1925&dummies!=48&dummies!=49, cluster(dummies)
estimates store f
reg ENP_cst c.logdis   popmillion vv1million   i.dummies if south==0&yr<1950&dummies!=48&dummies!=49, cluster(dummies)
estimates store h
reg ENP_cst c.logdis   popmillion vv1million   i.dummies if south==0&yr<1975&dummies!=48&dummies!=49, cluster(dummies)
estimates store i
reg ENP_cst c.logdis    popmillion vv1million   i.dummies if south==0&yr<2000&dummies!=48&dummies!=49, cluster(dummies)
estimates store j
reg ENP_cst c.logdis   popmillion vv1million   i.dummies if south==0&yr<2020&dummies!=48&dummies!=49, cluster(dummies)
estimates store k
coefplot c d e f h i j k  , keep ( logdist) yline(0) xlabel(0.61(0.11)1.38, grid) title("Non-South") ytitle("Coefficient on (the log of) distance") xlabel(0.61 "1850" 0.72 "1875" 0.83 "1900" 0.94 "1925" 1.06 "1950" 1.17 "1975" 1.28 "2000" 1.39 "2016")  legend(off) ciopts(lcolor(ebblue)) mcolor(ebblue) vert name(c2)
graph combine c1 c2, saving("Figure 3_The_Effect_of_the_Distance_to_Washington")

*Figure 4 
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr>1849&yr<1875&dummies<32, cluster(dummies)
estimates store d
reg ENP_cst c.logdis   vv1million  popmillion  i.dummies if yr>1849&yr<1900&dummies<32, cluster(dummies)
estimates store e
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr>1849&yr<1925&dummies<32, cluster(dummies)
estimates store f
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr>1849&yr<1950&dummies<32, cluster(dummies)
estimates store h
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr>1849&yr<1975&dummies<32, cluster(dummies)
estimates store i
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr>1849&yr<2000&dummies<32, cluster(dummies)
estimates store j
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr>1849&yr<2020&dummies<32, cluster(dummies)
estimates store k
coefplot d e f h i j k  , keep ( logdist) yline(0) xlabel(0.625(0.125)1.375, grid) title("Entire US") ytitle("Coefficient on (the log of) distance") xlabel(0.625 "1875" 0.750 "1900" 0.875 "1925" 1.00 "1950" 1.125 "1975" 1.25 "2000" 1.375 "2016")  legend(off) ciopts(lcolor(ebblue)) mcolor(ebblue) vert name(c3)
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if south==0&yr>1849&yr<1875&dummies<32, cluster(dummies)
estimates store d
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if south==0&yr>1849&yr<1900&dummies<32, cluster(dummies)
estimates store e
reg ENP_cst c.logdis   vv1million  popmillion   i.dummies if south==0&yr>1849&yr<1925&dummies<32, cluster(dummies)
estimates store f
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if south==0&yr>1849&yr<1950&dummies<32, cluster(dummies)
estimates store h
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if south==0&yr>1849&yr<1975&dummies<32, cluster(dummies)
estimates store i
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if south==0&yr>1849&yr<2000&dummies<32, cluster(dummies)
estimates store j
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if south==0&yr>1849&yr<2020&dummies<32, cluster(dummies)
estimates store k
coefplot d e f h i j k  , keep ( logdist) yline(0) xlabel(0.625(0.125)1.375, grid) title("Non-South") ytitle("Coefficient on (the log of) distance") xlabel(0.625 "1875" 0.750 "1900" 0.875 "1925" 1.00 "1950" 1.125 "1975" 1.25 "2000" 1.375 "2016")  legend(off) ciopts(lcolor(ebblue)) mcolor(ebblue) vert name(c4)
graph combine c3 c4, saving("Figure 4_The Effect_of_the_Distance_to_Washington_with_a_Restricted_Sample")



*Table 2
reg ENP_cst c.yr vv1million popmillion  i.dummies if timezones==5, cluster(dummies)
estimates store m1, title(GMT-5)
reg ENP_cst c.yr vv1million popmillion  i.dummies if timezones==6, cluster(dummies)
estimates store m2, title(GMT-6)
reg ENP_cst c.yr vv1million popmillion  i.dummies if timezones==7, cluster(dummies)
estimates store m3, title(GMT-7)
reg ENP_cst c.yr vv1million popmillion  i.dummies if timezones==8, cluster(dummies)
estimates store m4, title(GMT-8)
estout m1 m2 m3 m4 using Table2_The_Effect_of_Distance_on_the_Number_of_Parties.text, cells(b(star fmt(4)) se(par fmt(4)))legend label stats(r2) varlabels(_cons Constant)


*Table A1
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if yr<1850&dummies!=48&dummies!=49, cluster(dummies)
estimates store m1, title(1850)
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr<1875&dummies!=48&dummies!=49, cluster(dummies)
estimates store m2, title(1875)
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr<1900&dummies!=48&dummies!=49, cluster(dummies)
estimates store m3, title(1900)
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if yr<1925&dummies!=48&dummies!=49, cluster(dummies)
estimates store m4, title(1925)
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr<1950&dummies!=48&dummies!=49, cluster(dummies)
estimates store m5, title(1950)
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if yr<1975&dummies!=48&dummies!=49, cluster(dummies)
estimates store m6, title(1975)
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if yr<2000&dummies!=48&dummies!=49, cluster(dummies)
estimates store m7, title(2000)
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if yr<2020&dummies!=48&dummies!=49, cluster(dummies)
estimates store m8, title(2016)
estout m1 m2 m3 m4 m5 m6 m7 m8 using TableA1_The_Effect_of_Distance_on_the_Number_of_Parties_with_Entire_US.text, cells(b(star fmt(4)) se(par fmt(5)))legend label stats(r2) varlabels(_cons Constant)

*Table A2
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if south==0&yr<1850&dummies!=48&dummies!=49, cluster(dummies)
estimates store m1, title(1850)
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if south==0&yr<1875&dummies!=48&dummies!=49, cluster(dummies)
estimates store m2, title(1875)
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if south==0&yr<1900&dummies!=48&dummies!=49, cluster(dummies)
estimates store m3, title(1900)
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if south==0&yr<1925&dummies!=48&dummies!=49, cluster(dummies)
estimates store m4, title(1925)
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if south==0&yr<1950&dummies!=48&dummies!=49, cluster(dummies)
estimates store m5, title(1950)
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if south==0&yr<1975&dummies!=48&dummies!=49, cluster(dummies)
estimates store m6, title(1975)
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if south==0&yr<2000&dummies!=48&dummies!=49, cluster(dummies)
estimates store m7, title(2000)
reg ENP_cst c.logdis   vv1million   popmillion i.dummies if south==0&yr<2020&dummies!=48&dummies!=49, cluster(dummies)
estimates store m8, title(2016)
estout m1 m2 m3 m4 m5 m6 m7 m8 using TableA2_The_Effect_of_Distance_on_the_Number_of_Parties_with_Non_South.text, cells(b(star fmt(4)) se(par fmt(5)))legend label varlabels(_cons Constant)

*Table A3
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr>1849&yr<1875&dummies<32, cluster(dummies)
estimates store m1, title(1875)
reg ENP_cst c.logdis   vv1million  popmillion  i.dummies if yr>1849&yr<1900&dummies<32, cluster(dummies)
estimates store m2, title(1900)
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr>1849&yr<1925&dummies<32, cluster(dummies)
estimates store m3, title(1925)
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr>1849&yr<1950&dummies<32, cluster(dummies)
estimates store m4, title(1950)
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr>1849&yr<1975&dummies<32, cluster(dummies)
estimates store m5, title(1975)
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr>1849&yr<2000&dummies<32, cluster(dummies)
estimates store m6, title(2000)
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if yr>1849&yr<2020&dummies<32, cluster(dummies)
estimates store m7, title(2016)
estout m1 m2 m3 m4 m5 m6 m7 using TableA3_The_Effect_of_Distance_with_Restricted_Sample.text, cells(b(star fmt(4)) se(par fmt(5)))legend label stats(r2) varlabels(_cons Constant) 


*Table A4
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if south==0&yr>1849&yr<1875&dummies<32, cluster(dummies)
estimates store m1, title(1875)
reg ENP_cst c.logdis   vv1million  popmillion  i.dummies if south==0 &yr>1849&yr<1900&dummies<32, cluster(dummies)
estimates store m2, title(1900)
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if south==0 &yr>1849&yr<1925&dummies<32, cluster(dummies)
estimates store m3, title(1925)
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if south==0 &yr>1849&yr<1950&dummies<32, cluster(dummies)
estimates store m4, title(1950)
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if south==0 &yr>1849&yr<1975&dummies<32, cluster(dummies)
estimates store m5, title(1975)
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if south==0 &yr>1849&yr<2000&dummies<32, cluster(dummies)
estimates store m6, title(2000)
reg ENP_cst c.logdis   vv1million    popmillion i.dummies if south==0 &yr>1849&yr<2020&dummies<32, cluster(dummies)
estimates store m7, title(2016)
estout m1 m2 m3 m4 m5 m6 m7 using TableA4_The_Effect_of_Distance_on_the_Number_of_Parties_in_Non_South_with_Restricted_Sample.text, cells(b(star fmt(4)) se(par fmt(5)))legend label stats(r2) varlabels(_cons Constant) 

*Table A5
reg ENP_cst c.yr vv1million popmillion  i.dummies if timezones==5&south==0, cluster(dummies)
estimates store m1, title(GMT-5)
reg ENP_cst c.yr vv1million popmillion  i.dummies if timezones==6&south==0, cluster(dummies)
estimates store m2, title(GMT-6)
reg ENP_cst c.yr vv1million popmillion  i.dummies if timezones==7&south==0, cluster(dummies)
estimates store m3, title(GMT-7)
reg ENP_cst c.yr vv1million popmillion  i.dummies if timezones==8&south==0, cluster(dummies)
estimates store m4, title(GMT-8)
estout m1 m2 m3 m4 using TableA5_The_Effect_of_Distance_on_Number_Parties.text, cells(b(star fmt(4)) se(par fmt(5)))legend label stats(r2) varlabels(_cons Constant)
